Visualizations in R
Libraries
Unnecessary staff
load data
data <- read.csv("owid-covid-data.csv", stringsAsFactors = F)
data$date <- as.Date(data$date, "%Y-%m-%d")
datatable(head(data), caption = "Table 1: This is a simple caption for the table.",
class = "cell-border stripe", options = list(pageLength = 5))temp <- data.frame(data) %>% select(continent, location, gdp_per_capita, population_density,
total_cases, population)
temp <- subset(temp, location != "World" & location != "International")
data_agg <- aggregate(temp, list(temp$location, temp$continent, temp$gdp_per_capita,
temp$population_density), max)
data_agg$Group.1 <- NULL
data_agg$Group.2 <- NULL
data_agg$Group.3 <- NULL
data_agg$Group.4 <- NULL
head(data_agg) continent location gdp_per_capita population_density total_cases
1 Asia Mongolia 11840.846 1.980 1517
2 Africa Namibia 9541.808 3.078 30198
3 Oceania Australia 44648.710 3.202 28708
4 Europe Iceland 46482.958 3.404 NA
5 South America Suriname 13767.119 3.612 7469
6 Africa Libya 17881.509 3.623 NA
population
1 3278292
2 2540916
3 25499881
4 341250
5 586634
6 6871287
continent location gdp_per_capita population_density total_cases population
51 Africa Zimbabwe 1899.775 42.729 26881 14862927
data_sel <- data.frame(data) %>% select(continent, location, date, new_cases, new_deaths,
total_cases, total_deaths, population)
head(data_sel) continent location date new_cases new_deaths total_cases
1 Asia Afghanistan 2020-02-24 1 NA 1
2 Asia Afghanistan 2020-02-25 0 NA 1
3 Asia Afghanistan 2020-02-26 0 NA 1
4 Asia Afghanistan 2020-02-27 0 NA 1
5 Asia Afghanistan 2020-02-28 0 NA 1
6 Asia Afghanistan 2020-02-29 0 NA 1
total_deaths population
1 NA 38928341
2 NA 38928341
3 NA 38928341
4 NA 38928341
5 NA 38928341
6 NA 38928341
library(plotly)
a <- ggplot(data_agg) + geom_point(aes(x = total_cases, y = population, color = continent,
size = gdp_per_capita)) + labs(x = "Total Cases", y = "Population", title = "Covid total cases vs population",
caption = "source: ") + theme(panel.background = element_rect(fill = "grey",
color = "white", size = 1.2), plot.background = element_rect(fill = "grey"),
plot.title = element_text(hjust = 0.5, size = 16, face = "bold", color = "brown"),
plot.caption = element_text(face = "italic", color = "brown"), panel.grid.major = element_line(linetype = "dashed",
size = 0.1), panel.grid.minor = element_blank()) + scale_x_continuous(trans = "log10") +
scale_y_continuous(trans = "log10")
ggplotly(a)ggplot(data=data_agg, aes(x= total_cases, y = continent, fill=continent)) +
geom_bar(width = 0.5, stat = 'identity') +
labs(title = 'Total cases by continent.',
subtitle = 'subtitle') +
xlab('Total Cases') +
ylab('Continents') +
theme(legend.position = "none",
text = element_text(size=8),
axis.text.y = element_text(hjust=1)) +
geom_text(aes(label = total_cases), vjust =0.5, hjust=-0.03,
color = 'darkgreen', size = 3.5) +# since the bars are horizontal I put
# the labels next to the bars
theme(plot.title = element_text(size =12, face='bold',
color = 'brown'))ggplot(data = data_agg, aes(x = total_cases)) + geom_bar(stat = "count", fill = "darkgreen",
color = "darkgreen", aes(y = ..count..)) + theme_minimal() + geom_text(aes(label = ..count..),
stat = "count", vjust = -0.4)ggplot(iris, aes(x = Sepal.Width, fill = Species)) + geom_histogram(data = iris[,
-5], fill = "grey", alpha = 0.5, bins = 30) + geom_histogram(colour = "black",
bins = 30) + facet_wrap(~Species) + guides(fill = FALSE)ggplot(data_agg, aes(x = gdp_per_capita, fill = continent)) + geom_histogram(data = data_agg[,
-5], fill = "grey", alpha = 0.5, bins = 30) + geom_histogram(colour = "black",
bins = 30) + facet_wrap(~continent) + guides(fill = FALSE)# library(gganimate) library(gapminder) start_pause <- 0 end_pause <- 0
# ggplot(gapminder, aes(gdpPercap, lifeExp, size = pop, colour = country)) +
# geom_point(alpha = 0.7, show.legend = FALSE) + scale_colour_manual(values =
# country_colors) + scale_size(range = c(2, 12)) + scale_x_log10() +
# facet_wrap(~continent) + # Here comes the gganimate specific bits labs(title =
# 'Year: {frame_time}', x = 'GDP per capita', y = 'life expectancy') +
# transition_time(year) + ease_aes('linear') continent location date new_cases new_deaths total_cases
1 Asia Afghanistan 2020-02-24 1 NA 1
2 Asia Afghanistan 2020-02-25 0 NA 1
3 Asia Afghanistan 2020-02-26 0 NA 1
4 Asia Afghanistan 2020-02-27 0 NA 1
5 Asia Afghanistan 2020-02-28 0 NA 1
6 Asia Afghanistan 2020-02-29 0 NA 1
total_deaths population
1 NA 38928341
2 NA 38928341
3 NA 38928341
4 NA 38928341
5 NA 38928341
6 NA 38928341
continent location date new_cases new_deaths total_cases
1 North America Canada 2020-01-26 1 NA 1
2 North America Canada 2020-01-27 0 NA 1
3 North America Canada 2020-01-28 1 NA 2
4 North America Canada 2020-01-29 0 NA 2
5 North America Canada 2020-01-30 0 NA 2
6 North America Canada 2020-01-31 2 NA 4
total_deaths population
1 NA 37742157
2 NA 37742157
3 NA 37742157
4 NA 37742157
5 NA 37742157
6 NA 37742157
# data_anim <- data_sel %>% filter(location %in% c('Germany','Canada', 'Mexico',
# 'Italy', 'Spain', 'Poland'))
data_anim <- data_sel %>% filter(continent %in% c("africa", "Asia", "Europe", "North America",
"South America", "Oceania"))
data_anim <- data_anim[complete.cases(data_anim), ]
nice <- ggplot(data_anim, aes(total_cases, total_deaths, size = population, colour = continent)) +
geom_point(alpha = 0.7, show.legend = FALSE) + scale_size(range = c(2, 12)) +
scale_x_log10() + facet_wrap(~continent) + # Here comes the gganimate specific bits
labs(title = "Year: {frame_time}", x = "Total Cases", y = "Total Deaths") + transition_time(date) +
ease_aes("linear")
anim_save("nice.gif", nice)for maps:
library(tidyverse)
library(rnaturalearth)
library(cowplot)
library(sf)
library(ggmap)
library(leaflet)
library("rnaturalearth")
library("rnaturalearthdata")
library("sf")
library("rgeos")
###
library(tidyverse)
library(ggplot2)
library(readr)
library(maps)
library(viridis)world <- ne_countries(scale = "medium", returnclass = "sf")
world <- data.frame(world)
world <- world %>% rename(location = name)
head(world) scalerank featurecla labelrank sovereignt sov_a3 adm0_dif level type
0 3 Admin-0 country 5 Netherlands NL1 1 2 Country
admin adm0_a3 geou_dif geounit gu_a3 su_dif subunit su_a3 brk_diff location
0 Aruba ABW 0 Aruba ABW 0 Aruba ABW 0 Aruba
name_long brk_a3 brk_name brk_group abbrev postal formal_en formal_fr
0 Aruba ABW Aruba <NA> Aruba AW Aruba <NA>
note_adm0 note_brk name_sort name_alt mapcolor7 mapcolor8 mapcolor9
0 Neth. <NA> Aruba <NA> 4 2 2
mapcolor13 pop_est gdp_md_est pop_year lastcensus gdp_year
0 9 103065 2258 NA 2010 NA
economy income_grp wikipedia fips_10 iso_a2 iso_a3
0 6. Developing region 2. High income: nonOECD NA <NA> AW ABW
iso_n3 un_a3 wb_a2 wb_a3 woe_id adm0_a3_is adm0_a3_us adm0_a3_un adm0_a3_wb
0 533 533 AW ABW NA ABW ABW NA NA
continent region_un subregion region_wb name_len long_len
0 North America Americas Caribbean Latin America & Caribbean 5 5
abbrev_len tiny homepart geometry
0 5 4 NA MULTIPOLYGON (((-69.89912 1...
[ reached 'max' / getOption("max.print") -- omitted 5 rows ]
continent location gdp_per_capita population_density total_cases
1 Asia Mongolia 11840.846 1.980 1517
2 Africa Namibia 9541.808 3.078 30198
3 Oceania Australia 44648.710 3.202 28708
4 Europe Iceland 46482.958 3.404 NA
5 South America Suriname 13767.119 3.612 7469
6 Africa Libya 17881.509 3.623 NA
population
1 3278292
2 2540916
3 25499881
4 341250
5 586634
6 6871287
# Default theme:
theme_set(theme_bw())
world <- merge(x = world, y = data_agg, by = "location", all.x = TRUE)
head(world) location scalerank featurecla labelrank sovereignt sov_a3 adm0_dif
1 Afghanistan 1 Admin-0 country 3 Afghanistan AFG 0
level type admin adm0_a3 geou_dif geounit gu_a3 su_dif
1 2 Sovereign country Afghanistan AFG 0 Afghanistan AFG 0
subunit su_a3 brk_diff name_long brk_a3 brk_name brk_group abbrev
1 Afghanistan AFG 0 Afghanistan AFG Afghanistan <NA> Afg.
postal formal_en formal_fr note_adm0 note_brk name_sort
1 AF Islamic State of Afghanistan <NA> <NA> <NA> Afghanistan
name_alt mapcolor7 mapcolor8 mapcolor9 mapcolor13 pop_est gdp_md_est
1 <NA> 5 6 8 7 28400000 22270
pop_year lastcensus gdp_year economy income_grp
1 NA 1979 NA 7. Least developed region 5. Low income
wikipedia fips_10 iso_a2 iso_a3 iso_n3 un_a3 wb_a2 wb_a3 woe_id adm0_a3_is
1 NA <NA> AF AFG 004 004 AF AFG NA AFG
adm0_a3_us adm0_a3_un adm0_a3_wb continent.x region_un subregion
1 AFG NA NA Asia Asia Southern Asia
region_wb name_len long_len abbrev_len tiny homepart
1 South Asia 11 11 4 NA 1
geometry continent.y gdp_per_capita population_density
1 MULTIPOLYGON (((74.89131 37... Asia 1803.987 54.422
total_cases population
1 53938 38928341
[ reached 'max' / getOption("max.print") -- omitted 5 rows ]
# ggplot(data = world) + geom_sf(aes(fill = income_grp )) + coord_sf(expand =
# FALSE) + # geom_sf(aes(fill = pop_est)) + scale_fill_brewer(palette = 'YlOrBr')
# + # scale_fill_viridis_c(option = 'plasma', trans = 'sqrt', guide =
# guide_colorbar(barwidth = 30), name = NULL) + theme(legend.position = 'right')
# # theme(legend.position = 'bottom')# A tibble: 6 x 368
`Province/State` `Country/Region` Lat Long `1/22/20` `1/23/20` `1/24/20`
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 <NA> Afghanistan 33.9 67.7 0 0 0
2 <NA> Albania 41.2 20.2 0 0 0
3 <NA> Algeria 28.0 1.66 0 0 0
4 <NA> Andorra 42.5 1.52 0 0 0
5 <NA> Angola -11.2 17.9 0 0 0
6 <NA> Antigua and Bar~ 17.1 -61.8 0 0 0
# ... with 361 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>,
# `1/27/20` <dbl>, `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>,
# `1/31/20` <dbl>, `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>,
# `2/4/20` <dbl>, `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>,
# `2/8/20` <dbl>, `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>,
# `2/12/20` <dbl>, `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>,
# `2/16/20` <dbl>, `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>,
# `2/20/20` <dbl>, `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>,
# `2/24/20` <dbl>, `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>,
# `2/28/20` <dbl>, `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>,
# `3/3/20` <dbl>, `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>,
# `3/7/20` <dbl>, `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>,
# `3/11/20` <dbl>, `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>,
# `3/15/20` <dbl>, `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>,
# `3/19/20` <dbl>, `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>,
# `3/23/20` <dbl>, `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>,
# `3/27/20` <dbl>, `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>,
# `3/31/20` <dbl>, `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>,
# `4/4/20` <dbl>, `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>,
# `4/8/20` <dbl>, `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>,
# `4/12/20` <dbl>, `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>,
# `4/16/20` <dbl>, `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>,
# `4/20/20` <dbl>, `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>,
# `4/24/20` <dbl>, `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>,
# `4/28/20` <dbl>, `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>,
# `5/2/20` <dbl>, `5/3/20` <dbl>, ...
world <- map_data("world")
# cutoffs based on the number of cases
mybreaks <- c(1, 20, 100, 1000, 50000)
ggplot() + geom_polygon(data = world, aes(x = long, y = lat, group = group), fill = "grey",
alpha = 0.3) + geom_point(data = datacov, aes(x = Long, y = Lat, size = `3/3/20`,
color = `3/3/20`), stroke = F, alpha = 0.7) + scale_size_continuous(name = "Cases",
trans = "log", range = c(1, 7), breaks = mybreaks, labels = c("1-19", "20-99",
"100-999", "1,000-49,999", "50,000+")) + # scale_alpha_continuous(name='Cases', trans='log', range=c(0.1,
# 0.9),breaks=mybreaks) +
scale_color_viridis_c(option = "inferno", name = "Cases", trans = "log", breaks = mybreaks,
labels = c("1-19", "20-99", "100-999", "1,000-49,999", "50,000+")) + theme_void() +
guides(colour = guide_legend()) + labs(caption = "Data Repository provided by Johns Hopkins CSSE. Visualization by DataScience+ ") +
theme(legend.position = "bottom", text = element_text(color = "#22211d"), plot.background = element_rect(fill = "#ffffff",
color = NA), panel.background = element_rect(fill = "#ffffff", color = NA),
legend.background = element_rect(fill = "#ffffff", color = NA))# remotes::install_github('GuangchuangYu/nCov2019') source:
# https://github.com/GuangchuangYu/shadowtext/blob/master/R/geom-shadowtext.R
# source: https://github.com/GuangchuangYu/nCov2019
library("remotes")
# remotes::install_github('GuangchuangYu/nCov2019', dependencies = TRUE)
library("nCov2019")
library(ggplot2)
y <- load_nCov2019(lang = "en")
d <- y["global"]
max_time <- max(d$time)
min_time <- max_time - 7
d <- na.omit(d[d$time >= min_time & d$time <= max_time, ])
dd <- d[d$time == max(d$time, na.rm = TRUE), ]
d$country <- factor(d$country, levels = unique(dd$country[order(dd$cum_confirm)]))
breaks = c(10, 100, 1000, 10000)
ggplot(d, aes(time, country)) + geom_tile(aes(fill = cum_confirm), color = "black") +
scale_fill_viridis_c(trans = "log", breaks = breaks, labels = breaks) + xlab(NULL) +
ylab(NULL) + scale_x_date(date_labels = "%Y-%m-%d") + theme_minimal()# require(nCov2019) y <- load_nCov2019(lang = 'en', source='github') d =
# y['global'] require(dplyr) dd <- filter(d, time == time(y)) %>%
# arrange(desc(cum_confirm)) dd = dd[1:40, ] dd$country = factor(dd$country,
# levels=dd$country) dd$angle = 1:40 * 360/40 require(ggplot2) p <- ggplot(dd,
# aes(country, cum_confirm, fill=cum_confirm)) + geom_col(width=1,
# color='grey90') + geom_col(aes(y=I(5)), width=1, fill='grey90', alpha = .2) +
# geom_col(aes(y=I(3)), width=1, fill='grey90', alpha = .2) +
# geom_col(aes(y=I(2)), width=1, fill = 'white') + scale_y_log10() +
# scale_fill_gradientn(colors=c('darkgreen', 'green', 'orange',
# 'firebrick','red'), trans='log') + geom_text(aes(label=paste(country,
# cum_confirm, sep='\n'), y = cum_confirm *.8, angle=angle), data=function(d)
# d[d$cum_confirm > 700,], size=3, color = 'white', fontface='bold', vjust=1) +
# geom_text(aes(label=paste0(cum_confirm, ' cases ', country), y =
# max(cum_confirm) * 2, angle=angle+90), data=function(d) d[d$cum_confirm <
# 700,], size=3, vjust=0) + coord_polar(direction=-1) + theme_void() +
# theme(legend.position='none') + ggtitle('COVID19 global trend', time(y))
# print(p)Including Plots
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.